import pandas as pd
import sqlite3 as sq3
import numpy as np
df = pd.read_csv("https://raw.githubusercontent.com/bcaffo/ds4bme_intro/master/data/kirby21.csv")
url = "https://raw.githubusercontent.com/bcaffo/MRIcloudT1volumetrics/master/inst/extdata/multilevel_lookup_table.txt"
multilevel_lookup = pd.read_csv(url, sep = "\t").drop(['Level5'], axis = 1)
multilevel_lookup = multilevel_lookup.rename(columns = {
"modify" : "roi",
"modify.1" : "level4",
"modify.2" : "level3",
"modify.3" : "level2",
"modify.4" : "level1"})
multilevel_lookup = multilevel_lookup[['roi', 'level4', 'level3', 'level2', 'level1']]
multilevel_lookup
| roi | level4 | level3 | level2 | level1 | |
|---|---|---|---|---|---|
| 0 | SFG_L | SFG_L | Frontal_L | CerebralCortex_L | Telencephalon_L |
| 1 | SFG_R | SFG_R | Frontal_R | CerebralCortex_R | Telencephalon_R |
| 2 | SFG_PFC_L | SFG_L | Frontal_L | CerebralCortex_L | Telencephalon_L |
| 3 | SFG_PFC_R | SFG_R | Frontal_R | CerebralCortex_R | Telencephalon_R |
| 4 | SFG_pole_L | SFG_L | Frontal_L | CerebralCortex_L | Telencephalon_L |
| ... | ... | ... | ... | ... | ... |
| 278 | Chroid_LVetc_L | AnteriorLateralVentricle_L | LateralVentricle_L | Ventricle | CSF |
| 279 | Chroid_LVetc_R | AnteriorLateralVentricle_R | LateralVentricle_R | Ventricle | CSF |
| 280 | IV_ventricle | IV_ventricle | IV_ventricle | Ventricle | CSF |
| 281 | ECCL_L | inf_DPWM_L | InferiorWM_L | WhiteMatter_L | Telencephalon_L |
| 282 | ECCL_R | inf_DPWM_R | InferiorWM_R | WhiteMatter_R | Telencephalon_R |
283 rows × 5 columns
id = 127
subjectData = pd.read_csv("https://raw.githubusercontent.com/bcaffo/ds4bme_intro/master/data/kirby21.csv")
subjectData = subjectData.loc[(subjectData.type == 1) & (subjectData.level == 5) & (subjectData.id == id)]
subjectData = subjectData[['roi', 'volume']]
## Merge the subject data with the multilevel data
subjectData = pd.merge(subjectData, multilevel_lookup, on = "roi")
subjectData = subjectData.assign(icv = "ICV")
subjectData = subjectData.assign(comp = subjectData.volume / np.sum(subjectData.volume))
subjectData.head()
| roi | volume | level4 | level3 | level2 | level1 | icv | comp | |
|---|---|---|---|---|---|---|---|---|
| 0 | SFG_L | 12926 | SFG_L | Frontal_L | CerebralCortex_L | Telencephalon_L | ICV | 0.009350 |
| 1 | SFG_R | 10050 | SFG_R | Frontal_R | CerebralCortex_R | Telencephalon_R | ICV | 0.007270 |
| 2 | SFG_PFC_L | 12783 | SFG_L | Frontal_L | CerebralCortex_L | Telencephalon_L | ICV | 0.009247 |
| 3 | SFG_PFC_R | 11507 | SFG_R | Frontal_R | CerebralCortex_R | Telencephalon_R | ICV | 0.008324 |
| 4 | SFG_pole_L | 3078 | SFG_L | Frontal_L | CerebralCortex_L | Telencephalon_L | ICV | 0.002227 |
icv = subjectData.groupby(['icv', 'level1'], as_index = False).agg({'volume':'sum'})
source = icv['icv'].to_list()
target = icv['level1'].to_list()
value = icv['volume'].to_list()
dic = {}
index = 1
for s in source:
if s not in dic:
dic[s] = index
index += 1
for t in target:
if t not in dic:
dic[t] = index
index += 1
source_to_idx = []
target_to_idx = []
for s in source:
source_to_idx.append(dic[s])
for t in target:
target_to_idx.append(dic[t])
import plotly.graph_objects as go
fig = go.Figure(data=[go.Sankey(
node = dict(
pad = 10,
thickness = 20,
line = dict(color = "black", width = 0.5),
label = ['A1', 'A2', 'A3', 'A4', 'A5', 'A6', 'A7', 'A8'],
color = "blue"
),
link = dict(
source = source_to_idx, # indices correspond to labels, eg A1, A2, A1, B1, ...
target = target_to_idx,
value = value
)
)])
fig.update_layout(title_text="Sankey Diagram", font_size=15)
fig.show()
import plotly.express as px
con = sq3.connect("opioid.db")
annual = pd.read_sql_query("SELECT * from annual", con)
land = pd.read_sql_query("SELECT * from annual", con)
population = pd.read_sql_query("SELECT * from population", con)
con.close()
annual.head
<bound method NDFrame.head of BUYER_COUNTY BUYER_STATE year count DOSAGE_UNIT countyfips 0 1 ABBEVILLE SC 2006 877 363620 45001 1 2 ABBEVILLE SC 2007 908 402940 45001 2 3 ABBEVILLE SC 2008 871 424590 45001 3 4 ABBEVILLE SC 2009 930 467230 45001 4 5 ABBEVILLE SC 2010 1197 539280 45001 ... ... ... ... ... ... ... ... 27753 27754 NA NV 2007 447 200600 NA 27754 27755 NA NV 2008 5 2200 NA 27755 27756 NA OH 2006 23 5100 NA 27756 27757 NA PR 2006 10 17800 NA 27757 27758 NA PR 2007 2 1300 NA [27758 rows x 7 columns]>
annual["DOSAGE_UNIT"] = pd.to_numeric(annual["DOSAGE_UNIT"], downcast="float")
data = annual.groupby('year').mean()
data
| DOSAGE_UNIT | |
|---|---|
| year | |
| 2006 | 2645409.75 |
| 2007 | 2992099.50 |
| 2008 | 3252161.75 |
| 2009 | 3526038.00 |
| 2010 | 3783655.75 |
| 2011 | 4035583.00 |
| 2012 | 3993064.00 |
| 2013 | 3861751.75 |
| 2014 | 3768738.00 |
fig = px.scatter(x=data.index, y=data['DOSAGE_UNIT'])
fig.show()